from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

loader = TextLoader(r"D:\model_code\pythonkonwledge\embeddings_\sidamingzhu.txt", encoding='utf-8')
docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=50)
data = text_splitter.split_documents(docs)
print(len(data))
for doc in data:
    print(len(doc.page_content), doc.page_content)





